{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pandas 数据清洗和分析工作更快更简单的数据结构和操作工具\n",
    "# 数值计算 numpy scipy\n",
    "# 分析库 statsmodels scikit-learn\n",
    "# 数据可视化库 matplotlib\n",
    "\n",
    "# pandas 基于 numpy 数组构建的，特别基于数组的函数和不使用 for 循环的数据处理\n",
    "# numpy 适用于处理统一的数值型数组数据\n",
    "# pandas 适用于处理表格式或者非结构化式的高维数据\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "# 因为 Series 和 DataFrame 使用非常频繁，故此将其引入本地命名空间中更加方便\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    4\n",
       "1    7\n",
       "2   -5\n",
       "3    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 类似 一维数组的对象 \n",
    "# 由一组数据 和 一组与之相关的数据标签（索引）构成\n",
    "obj = pd.Series([4, 7, -5, 3])\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 4,  7, -5,  3], dtype=int64)"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 表现形式：索引在左 ， 值在右 \n",
    "# 若未指定数据的标签索引，则自动创建从 0 - N-1 的整数索引\n",
    "# 通过 Series 的 values 属性获取 数值，以数组形式返回\n",
    "# 通过 Series 的 index 属性获取 索引对象，以数组形式返回\n",
    "obj.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RangeIndex(start=0, stop=4, step=1)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "d    4\n",
       "b    7\n",
       "a   -5\n",
       "c    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 创建 Series 带有数据标签的一维数据\n",
    "obj2 = pd.Series([4, 7, -5, 3], index=[\"d\", \"b\", \"a\", \"c\"])\n",
    "obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['d', 'b', 'a', 'c'], dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-5"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 通过数据标签索引访问数据\n",
    "obj2[\"a\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "c    3\n",
       "a   -5\n",
       "d    6\n",
       "dtype: int64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj2[\"d\"] = 6\n",
    "\n",
    "obj2[[\"c\", \"a\", \"d\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "d    6\n",
      "b    7\n",
      "c    3\n",
      "dtype: int64\n",
      "d    12\n",
      "b    14\n",
      "a   -10\n",
      "c     6\n",
      "dtype: int64\n",
      "d     403.428793\n",
      "b    1096.633158\n",
      "a       0.006738\n",
      "c      20.085537\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "# 使用NumPy函数或类似NumPy的运算， 都会保留索引值的链接\n",
    "# 根据布尔型数组进行过滤、 标量乘法、 应用数学函数\n",
    "import numpy as np\n",
    "\n",
    "print(obj2[obj2 > 0])\n",
    "print(obj2 * 2)\n",
    "print(np.exp(obj2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 将Series看成是一个定长的有序字典， 因为它是索引值到数据值的一个映射\n",
    "# 它可以用在许多原本需要字典参数的函数中：\n",
    "'b' in obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'e' in obj2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ohio      35000\n",
       "Texas     71000\n",
       "Oregon    16000\n",
       "Utah       5000\n",
       "dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 数据被存放在一个Python字典中， 也可以直接通过这个字典来创建Series\n",
    "data_dict = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}\n",
    "obj3 = pd.Series(data_dict)\n",
    "obj3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "California        NaN\n",
       "Ohio          35000.0\n",
       "Oregon        16000.0\n",
       "Texas         71000.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 如果只传入一个字典， 则结果Series中的索引就是原字典的键（有序排列）\n",
    "# 可以传入排好序的字典的键以改变顺序\n",
    "states = ['California', 'Ohio', 'Oregon', 'Texas']\n",
    "obj4 = pd.Series(data_dict, index=states)\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "California     True\n",
       "Ohio          False\n",
       "Oregon        False\n",
       "Texas         False\n",
       "dtype: bool"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# NaN : not a number\n",
    "# pandas 表示缺失值 或者 NA 值\n",
    "# pandas 的 isnull 和 notnull 函数可以检测缺失数据\n",
    "pd.isnull(obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "California    False\n",
       "Ohio           True\n",
       "Oregon         True\n",
       "Texas          True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.notnull(obj4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "California     True\n",
      "Ohio          False\n",
      "Oregon        False\n",
      "Texas         False\n",
      "dtype: bool\n",
      "=====================\n",
      "California    False\n",
      "Ohio           True\n",
      "Oregon         True\n",
      "Texas          True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "source": [
    "# Series 的实例化对象 一样有着相同功能的方法\n",
    "print(obj4.isnull())\n",
    "print(\"=====================\")\n",
    "print(obj4.notnull())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "state\n",
       "California        NaN\n",
       "Ohio          35000.0\n",
       "Oregon        16000.0\n",
       "Texas         71000.0\n",
       "Name: population, dtype: float64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 的重要功能是 根据运算的索引标签自动对齐数据\n",
    "# 以上的输出结果 可以明显的表现出这一点重要功能：数据对齐功能 \n",
    "\n",
    "# Series 对象本身 和 其索引 都有一个 name 属性\n",
    "# 该属性与 pandas 其他关键功能非常密切\n",
    "obj4.name = \"population\"\n",
    "obj4.index.name = \"state\"\n",
    "obj4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    4\n",
       "1    7\n",
       "2   -5\n",
       "3    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series 的索引可以通过赋值的方式就地修改\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bob      4\n",
       "Steve    7\n",
       "Jeff    -5\n",
       "Ryan     3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']\n",
    "obj"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
